my_fastLink <- function(dfA, dfB, varnames,
                     stringdist.match = NULL, 
                     stringdist.method = "jw",
                     numeric.match = NULL, 
                     partial.match = NULL,
                     cut.a = 0.94, cut.p = 0.88,
                     jw.weight = .10,
                     cut.a.num = 1, cut.p.num = 2.5,
                     priors.obj = NULL,
                     w.lambda = NULL, w.pi = NULL, address.field = NULL,
                     gender.field = NULL, estimate.only = FALSE, em.obj = NULL,
                     dedupe.matches = TRUE, linprog.dedupe = FALSE,
                     reweight.names = FALSE, firstname.field = NULL, cond.indep = TRUE,
                     n.cores = NULL, tol.em = 1e-04, threshold.match = 0.85,
                     return.all = FALSE, return.df = FALSE, verbose = FALSE){
  
  cat("\n")
  cat(c(paste(rep("=", 20), sep = "", collapse = ""), "\n"))
  cat("fastLink(): Fast Probabilistic Record Linkage\n")
  cat(c(paste(rep("=", 20), sep = "", collapse = ""), "\n\n"))
  
  ## --------------------------------------
  ## Process inputs and stop if not correct
  ## --------------------------------------
  if(any(class(dfA) %in% c("tbl_df", "data.table"))){
    dfA <- as.data.frame(dfA)
  }
  if(any(class(dfB) %in% c("tbl_df", "data.table"))){
    dfB <- as.data.frame(dfB)
  }
  if(any(!(varnames %in% names(dfA)))){
    stop("Some variables in varnames are not present in dfA.")
  }
  if(any(!(varnames %in% names(dfB)))){
    stop("Some variables in varnames are not present in dfB.")
  }
  if(any(!(stringdist.match %in% varnames))){
    stop("You have provided a variable name for stringdist.match that is not in 'varnames'.")
  }
  if(any(!(numeric.match %in% varnames))){
    stop("You have provided a variable name for numeric.match that is not in 'varnames'.")
  }
  if(length(intersect(numeric.match, stringdist.match)) > 0){
    stop("There is a variable present in both 'numeric.match' and 'stringdist.match'. Please select only one matching metric for each variable.")
  }
  if(is.null(numeric.match)) {
    if (any(!(partial.match %in% varnames)) | any(!(partial.match %in% 
                                                    stringdist.match))) {
      stop("You have provided a variable name for 'partial.match' that is not present in either 'varnames', 'numeric.match', or 'stringdist.match'.")
    }
  } else {
    if (any(!(partial.match %in% varnames)) | any(!(partial.match %in% unique(c(stringdist.match, numeric.match))))) {
      stop("You have provided a variable name for 'partial.match' that is not present in either 'varnames', 'numeric.match', or 'stringdist.match'.")
    }
  }    
  if(!is.null(address.field)){
    if(length(address.field) > 1 | length(gender.field) > 1){
      stop("'address.field' must have at most only one variable name.")
    }
    if(!(address.field %in% varnames)){
      stop("You have provided a variable name for 'address.field' that is not in 'varnames'.")
    }
  }
  if(!is.null(gender.field)){
    if(length(gender.field) > 1){
      stop("'gender.field' must have at most one variable name.")
    }
    if(!(gender.field %in% varnames)){
      stop("You have provided a variable name for 'gender.field' that is not in 'varnames'.")
    }
  }
  if(reweight.names == TRUE & is.null(firstname.field)){
    stop("If reweighting the match probability by first name, you must provide the name of the field representing first name.")
  }
  if(!is.null(firstname.field)){
    if(length(firstname.field) > 1){
      stop("'firstname.field' must have at most one variable name.")
    }
    if(!(firstname.field %in% varnames)){
      stop("You have provided a variable name for 'firstname.field' that is not in 'varnames'.")
    }
  }
  if(!is.null(em.obj)){
    if(!("fastLink.EM" %in% class(em.obj))){
      stop("If providing an EM object, it must be of class 'fastLink.EM'.")
    }
  }
  if(!is.null(em.obj) & estimate.only){
    estimate.only <- FALSE
    cat("You have provided an EM object but have set 'estimate.only' to TRUE. Setting 'estimate.only' to FALSE so that matched indices are returned.\n")
  }
  if(!(stringdist.method %in% c("jw", "jaro", "lv"))){
    stop("Invalid string distance method. Method should be one of 'jw', 'jaro', or 'lv'.")
  }
  if(stringdist.method == "jw" & !is.null(jw.weight)){
    if(jw.weight < 0 | jw.weight > 0.25){
      stop("Invalid value provided for jw.weight. Remember, jw.weight in [0, 0.25].")
    }
  }
  if(return.all){
    threshold.match <- 0.001
    if(!dedupe.matches){
      cat("You have specified that all matches be returned but are not deduping the matches. The resulting object may be very large.\n")
    }
  }else{
    cat("If you set return.all to FALSE, you will not be able to calculate a confusion table as a summary statistic.\n")
  }
  if(!is.null(priors.obj) & cond.indep == FALSE){
    cat("The current implementation of fastLink can only incorporate prior information under the conditionally independent model. Ignoring prior information in estimation.")
    priors.obj <- NULL
    w.lambda <- NULL
    w.pi <- NULL
    address.field <- NULL
    gender.field <- NULL
  }
  
  ## Check class of numeric indicators
  classA <- lapply(dfA[,varnames], class)
  classB <- lapply(dfB[,varnames], class)
  if(any(unlist(classA)[names(classA) %in% numeric.match] != "numeric") |
     any(unlist(classB)[names(classB) %in% numeric.match] != "numeric")){
    stop("You have specified that a variable be compared using numeric matching, but that variable is not of class 'numeric'. Please check your variable classes.")
  }
  
  ## Check if data frames are identical
  dedupe.df <- FALSE
  if(identical(dfA, dfB)){
    cat("dfA and dfB are identical, assuming deduplication of a single data set.\nSetting return.all to FALSE.\n\n")
    dedupe.matches <- FALSE
    return.all <- FALSE
    dedupe.df <- TRUE
  }
  
  ## Create boolean indicators
  sm.bool <- which(varnames %in% stringdist.match)
  stringdist.match <- rep(FALSE, length(varnames))
  if(length(sm.bool) > 0){
    stringdist.match[sm.bool] <- TRUE
  }
  
  nm.bool <- which(varnames %in% numeric.match)
  numeric.match <- rep(FALSE, length(varnames))
  if(length(nm.bool) > 0){
    numeric.match[nm.bool] <- TRUE
  }
  
  pm.bool <- which(varnames %in% partial.match)
  partial.match <- rep(FALSE, length(varnames))
  if(length(pm.bool) > 0){
    partial.match[pm.bool] <- TRUE
  }
  
  af.bool <- which(varnames %in% address.field)
  address.field <- rep(FALSE, length(varnames))
  if(length(af.bool) > 0){
    address.field[af.bool] <- TRUE
  }
  
  gf.bool <- which(varnames %in% gender.field)
  gender.field <- rep(FALSE, length(varnames))
  if(length(gf.bool) > 0){
    gender.field[gf.bool] <- TRUE
  }
  
  fn.bool <- which(varnames %in% firstname.field)
  firstname.field <- rep(FALSE, length(varnames))
  if(length(fn.bool) > 0){
    firstname.field[fn.bool] <- TRUE
  }
  
  ## ----------------------------
  ## Calculate agreement patterns
  ## ----------------------------
  cat("Calculating matches for each variable.\n")
  start <- Sys.time()
  gammalist <- vector(mode = "list", length = length(varnames))
  for(i in 1:length(gammalist)){
    if(verbose){
      matchtype <- ifelse(stringdist.match[i], "string-distance", ifelse(numeric.match[i], "numeric", "exact"))
      cat("    Matching variable", varnames[i], "using", matchtype, "matching.\n")
    }
    ## Convert to character
    if(is.factor(dfA[,varnames[i]]) | is.factor(dfB[,varnames[i]])){
      dfA[,varnames[i]] <- as.character(dfA[,varnames[i]])
      dfB[,varnames[i]] <- as.character(dfB[,varnames[i]])
    }
    ## Warn if no variation (except for gender blocking)
    if(!gender.field[i]){
      if(sum(is.na(dfA[,varnames[i]])) == nrow(dfA) | length(unique(dfA[,varnames[i]])) == 1){
        cat(paste("WARNING: You have no variation in dataset A for", varnames[i], "or all observations are missing."))
      }
      if(sum(is.na(dfB[,varnames[i]])) == nrow(dfB) | length(unique(dfB[,varnames[i]])) == 1){
        cat(paste("WARNING: You have no variation in dataset B for", varnames[i], "or all observations are missing."))
      }
    }
    if(sum(dfA[,varnames[i]] %in% dfB[,varnames[i]]) == 0){
      cat(paste0("WARNING: You have no exact matches for ", varnames[i], "."))
    }
    ## Get patterns
    if(stringdist.match[i]){
      if(partial.match[i]){
        gammalist[[i]] <- gammaCKpar(
          dfA[,varnames[i]], dfB[,varnames[i]], cut.a = cut.a, cut.p = cut.p, method = stringdist.method, w = jw.weight, n.cores = n.cores
        )
      }else{
        gammalist[[i]] <- gammaCK2par(dfA[,varnames[i]], dfB[,varnames[i]], cut.a = cut.a, method = stringdist.method, w = jw.weight, n.cores = n.cores)
      }
    }else if(numeric.match[i]){
      if(partial.match[i]){
        gammalist[[i]] <- gammaNUMCKpar(
          dfA[,varnames[i]], dfB[,varnames[i]], cut.a = cut.a.num, cut.p = cut.p.num, n.cores = n.cores
        )
      }else{
        gammalist[[i]] <- gammaNUMCK2par(
          dfA[,varnames[i]], dfB[,varnames[i]], cut.a = cut.a.num, n.cores = n.cores
        )
      }
    }else{
      gammalist[[i]] <- gammaKpar(dfA[,varnames[i]], dfB[,varnames[i]], gender = gender.field[i], n.cores = n.cores)
    }
  }
  end <- Sys.time()
  if(verbose){
    cat("Calculating matches for each variable took", round(difftime(end, start, units = "mins"), 2), "minutes.\n\n")
  }
  
  ## Get row numbers
  nr_a <- nrow(dfA)
  nr_b <- nrow(dfB)
  
  ## ------------------------------
  ## Get counts for zeta parameters
  ## ------------------------------
  cat("Getting counts for parameter estimation.\n")
  start <- Sys.time()
  counts <- tableCounts(gammalist, nobs.a = nr_a, nobs.b = nr_b, n.cores = n.cores)
  end <- Sys.time()
  if(verbose){
    cat("Getting counts for parameter estimation took", round(difftime(end, start, units = "mins"), 2), "minutes.\n\n")
  }
  
  ## ------------------------------
  ## Run or impute the EM algorithm
  ## ------------------------------
  if(is.null(em.obj)){
    ## Run EM algorithm
    cat("Running the EM algorithm.\n")
    start <- Sys.time()
    if(is.null(priors.obj)){
      lambda.prior <- NULL
      pi.prior <- NULL
    }else{
      if("lambda.prior" %in% names(priors.obj)){
        lambda.prior <- priors.obj$lambda.prior
      }
      if("pi.prior" %in% names(priors.obj)){
        if(!("lambda.prior" %in% names(priors.obj))){
          stop("Must specify a prior for lambda if providing a prior for pi.")
        }
        pi.prior <- priors.obj$pi.prior
      }else{
        pi.prior <- NULL
      }
    }
    if(cond.indep == FALSE){
      resultsEM <- emlinklog(patterns = counts, nobs.a = nr_a, nobs.b = nr_b,
                             tol = tol.em, varnames = varnames)  
    }else{
      resultsEM <- emlinkMARmov(patterns = counts, nobs.a = nr_a, nobs.b = nr_b,
                                tol = tol.em,
                                prior.lambda = lambda.prior, w.lambda = w.lambda,
                                prior.pi = pi.prior, w.pi = w.pi,
                                address.field = address.field, 
                                gender.field = gender.field,
                                varnames = varnames)
    }
    end <- Sys.time()
    if(verbose){
      cat("Running the EM algorithm took", round(difftime(end, start, units = "secs"), 2), "seconds.\n\n")
    }
  }else{
    cat("Imputing matching probabilities using provided EM object.\n")
    resultsEM <- em.obj
  }
  
  if(max(resultsEM$zeta.j) < threshold.match) {
    warning(paste0("No matches found for the threshold value used. We recommend trying a lower threshold.match value. Note that you currently have threshold.match set to ", threshold.match, "."))
  }
  
  ## -----------------------------------------------
  ## Get the estimated matches, dedupe, and reweight
  ## -----------------------------------------------
  if(!estimate.only){
    
    ## Get matches
    cat("Getting the indices of estimated matches.\n")
    start <- Sys.time()
    matches <- matchesLink(gammalist, nobs.a = nr_a, nobs.b = nr_b,
                           em = resultsEM, thresh = threshold.match,
                           n.cores = n.cores)
    end <- Sys.time()
    if(verbose){
      cat("Getting the indices of estimated matches took", round(difftime(end, start, units = "mins"), 2), "minutes.\n\n")
    }
    
    ## Get the patterns
    patterns <- getPatterns(matchesA = dfA[matches$inds.a, ], matchesB = dfB[matches$inds.b, ],
                            varnames = varnames, stringdist.match = stringdist.match,
                            numeric.match = numeric.match, partial.match = partial.match,
                            stringdist.method = stringdist.method,
                            cut.a = cut.a, cut.p = cut.p, jw.weight = jw.weight,
                            cut.a.num = cut.a.num, cut.p.num = cut.p.num)
    
    ## Run deduplication
    if(dedupe.matches & length(matches$inds.a) > 0){
      cat("Deduping the estimated matches.\n")
      start <- Sys.time()
      ddm.out <- dedupeMatches(matchesA = dfA[matches$inds.a,], matchesB = dfB[matches$inds.b,],
                               EM = resultsEM, matchesLink = matches, patterns = patterns,
                               linprog = linprog.dedupe)
      matches <- ddm.out$matchesLink
      resultsEM <- ddm.out$EM
      end <- Sys.time()
      if(verbose){
        cat("Deduping the estimated matches took", round(difftime(end, start, units = "mins"), 2), "minutes.\n\n")
      }
    }else if(length(matches$inds.a) > 0){
      cat("Calculating the posterior for each pair of matched observations.\n")
      start <- Sys.time()
      zeta <- getPosterior(dfA[matches$inds.a,], dfB[matches$inds.b,], EM = resultsEM,
                           patterns = patterns)
      end <- Sys.time()
      if(verbose){
        cat("Calculating the posterior for each matched pair took", round(difftime(end, start, units = "mins"), 2), "minutes.\n\n")
      }
    }
    
    ## Get the patterns
    cat("Getting the match patterns for each estimated match.\n")
    start <- Sys.time()
    patterns <- getPatterns(matchesA = dfA[matches$inds.a, ], matchesB = dfB[matches$inds.b, ],
                            varnames = varnames, stringdist.match = stringdist.match,
                            numeric.match = numeric.match, partial.match = partial.match,
                            stringdist.method = stringdist.method,
                            cut.a = cut.a, cut.p = cut.p, jw.weight = jw.weight,
                            cut.a.num = cut.a.num, cut.p.num = cut.p.num)
    end <- Sys.time()
    if(verbose){
      cat("Getting the match patterns for each estimated match took", round(difftime(end, start, units = "mins"), 2), "minutes.\n\n")
    }
    
    ## Reweight first names or get zeta
    if(reweight.names & length(matches$inds.a) > 0){
      cat("Reweighting match probabilities by frequency of occurrence.\n")
      start <- Sys.time()
      rwn.out <- nameReweight(dfA, dfB, EM = resultsEM, gammalist = gammalist, matchesLink = matches,
                              varnames = varnames, firstname.field = firstname.field,
                              patterns = patterns, threshold.match = threshold.match, n.cores = n.cores)
      end <- Sys.time()
      if(verbose){
        cat("Reweighting by first name took", round(difftime(end, start, units = "mins"), 2), "minutes.\n\n")
      }
    }
    
    ## Return object
    out <- list()
    if(return.df){
      out[["dfA.match"]] <- dfA[matches$inds.a,]
      out[["dfB.match"]] <- dfB[matches$inds.b,]
    }
    out[["matches"]] <- matches
    out[["EM"]] <- resultsEM
    out[["patterns"]] <- patterns
    if(dedupe.matches & length(matches$inds.a) > 0){
      out[["posterior"]] <- ddm.out$max.zeta
    }else if(length(matches$inds.a) > 0){
      out[["posterior"]] <- zeta
    }
    if(reweight.names & length(matches$inds.a) > 0){
      out[["posterior"]] <- rwn.out
    }
    out[["nobs.a"]] <- nr_a
    out[["nobs.b"]] <- nr_b
    if(return.all){
      class(out) <- c("fastLink", "confusionTable")
    }else{
      class(out) <- "fastLink"
    }
    if(dedupe.df){
      class(out) <- c(class(out), "fastLink.dedupe")
    }
  }else{
    out <- resultsEM
  }
  
  return(out)
  
}